First, we need to associate the ecomorph to the species we have. This information is in the original .csv that Isabel sent us with the information of every sample. Ignasi compiled the information on the ecomorphs from different references, creating the following summary table.
library(knitr)
#our information
MORPH <- read.table("/gata/mar/cophyhopa/results/2022-12-23/eco_sp.txt", col.names = c("ecomorph", "species"))
kable(MORPH)
| ecomorph | species |
|---|---|
| Albock | acrinasus |
| Balchen1 | alpinus |
| Balchen1/Balchen2 | alpinus/steinmanni |
| Balchen2 | brienzii |
| Balchen2/Felchen | brienzii/fatioi |
| Balchen2 | steinmanni |
| Bondelle | confusus |
| Brienzlig | albellus |
| - | duplex |
| Felchen/Brienzlig | fatioi/albellus |
| Felchen | fatioi |
| - | heglingus |
| Kropfer | profundus |
| - | zuerichensis |
| L | LAN_L |
| P | LAN_P |
| D | LAN_D |
| L | SUO_L |
| P | SUO_P |
#ignasi table
ECOM <- read.table("/gata/mar/cophyhopa/data/ecomorphs.txt", header = TRUE)
kable(ECOM)
| Lake | Species | Ecomorph |
|---|---|---|
| Brienz-Thun | C.albellus | Albeli |
| Brienz-Thun | C.fatioi | Felchen |
| Brienz | C.brienzii | Felchen |
| Brienz-Thun | C.alpinus | Balchen |
| Thun | C.steinmanni | Balchen |
| Thun | C.profundus | Benthicprofundal |
| Constance | C.arenincolus | Balchen |
| Constance | C.macrophthalmus | Felchen |
| Thun | C.acrinasus | Largepelagic |
| Constance | C.wartmanni | Largepelagic |
| Luzern | C.suspensus | Largepelagic |
| Luzern | C.nobilis | Pelagicprofundal |
| Luzern | C.muelleri | Albeli |
| Luzern | C.litoralis | Balchen |
| Luzern | C.sp.Alpnacherfelchen | Felchen |
| Luzern | C.intermundia | Felchen |
| Walen-Zurich | C.duplex | Balchen |
| Zurich | C.zuerichensis | Felchen |
| Neuchatel | C.candidus | Albeli |
| Biel | C.confusus | Albeli |
| Biel-Neuchatel | C.palaea | Balchen |
| Drewitz | C.holsatus | Balchen |
| Muddus | C.lavaretus | NA |
| Shaal | C.maraenoides | Albeli |
| Langfjordvatn | C.lavaretus_Lang_D | D |
| Langfjordvatn | C.lavaretus_Lang_L | L |
| Langfjordvatn | C.lavaretus_Lang_P | P |
| Suopatjavri | C.lavaretus_Suo_L | L |
| Suopatjavri | C.lavaretus_Suo_P | P |
Using the information from both tables (and further considering the information provided by Ignasi’s references), we choose SUO_P and SUO_L (SUO lake), LAN_L and LAN_P (LAN lake) for the lakes in Norway. But for alpine lakes, the thing is that most of them are connected, so species are present in different lakes. We chose the species C.fatioi (Felchen ecomorph) present in Lakes Thun and Brienz, C.steinmani (Balchen ecomorph) present in Lake Thun, C.duplex (Balchen ecomorph) and C.zuerichensis (Felchen ecomorph) present in lakes Walen and Zurich, C.albellus (Albeli ecomorph) present in Lakes Thun and Brienz and, finally, C.confusus (Albeli ecomorph) present in Lake Bienne. So, we would be comparing Thun-Brienz vs. Walen-Zurich and Thun-Brienz v. Bienne for the alpine region.
Then the lake that shares the same ecomorph with another lake is taken into account for the next round of Fst stats to be calculated. To do that, we need to create a txt file with the names of each individual in a lake that shares the same ecomorph.
I am going to take the txt files previously created in the 2022-12-12 folder with the individuals we need: LAN_L, LAN_P, SUO_P, SUO_L, C.fatioi, C.steinmani, C.duplex, C.zuerichensis, C.albellus and C.confusus.
Ecomorph = c("Albeli", "Felchen", "Balchen", "P (Profundal)", "L (Litoral)")
Species = c("C.albellus (Thun-Brienz) / C.confusus (Bienne)", "C.fatioi (Thun-Brienz) / C.zuerichensis (Walen-Zurich)",
"C.steinmani (Thun) / C.duplex (Walen-Zurich)", "P (LAN) / P (SUO)", "L (LAN) / L (SUO)")
SUM <- data.frame(Ecomorph, Species)
kable(SUM)
| Ecomorph | Species |
|---|---|
| Albeli | C.albellus (Thun-Brienz) / C.confusus (Bienne) |
| Felchen | C.fatioi (Thun-Brienz) / C.zuerichensis (Walen-Zurich) |
| Balchen | C.steinmani (Thun) / C.duplex (Walen-Zurich) |
| P (Profundal) | P (LAN) / P (SUO) |
| L (Litoral) | L (LAN) / L (SUO) |
VCF='/gata/mar/cophyhopa/results/2022-04-22/assem2_outfiles/assem2.vcf'
if [ ! -e dirty.recode.vcf ]; then
vcftools --vcf $VCF \
--out dirty \
--remove ~/cophyhopa/results/2022-05-04/TheWorst.txt \
--recode \
--recode-INFO-all
fi
if [ ! -e FST/Felchen_Alpine.weir.fst ]; then
vcftools --vcf dirty.recode.vcf \
--out ./FST/Felchen_Alpine \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.fatioi.txt \
--weir-fst-pop C.zuerichensis.txt
fi
if [ ! -e FST/Felchen_Alpine_w.windowed.weir.fst ]; then
vcftools --vcf dirty.recode.vcf \
--out ./FST/Felchen_Alpine_w \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.fatioi.txt \
--weir-fst-pop C.zuerichensis.txt \
--fst-window-size 250000 \
--fst-window-step 50000
fi
if [ ! -e FST/Balchen_Alpine.weir.fst ]; then
vcftools --vcf dirty.recode.vcf \
--out ./FST/Balchen_Alpine \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.steinmanni.txt \
--weir-fst-pop C.duplex.txt
fi
if [ ! -e FST/Balchen_Alpine_w.windowed.weir.fst ]; then
vcftools --vcf dirty.recode.vcf \
--out ./FST/Balchen_Alpine_w \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.steinmanni.txt \
--weir-fst-pop C.duplex.txt \
--fst-window-size 250000 \
--fst-window-step 50000
fi
if [ ! -e FST/Albeli_Alpine.weir.fst ]; then
vcftools --vcf dirty.recode.vcf \
--out ./FST/Albeli_Alpine \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.albellus.txt \
--weir-fst-pop C.confusus.txt
fi
if [ ! -e FST/Albeli_Alpine_w.windowed.weir.fst ]; then
vcftools --vcf dirty.recode.vcf \
--out ./FST/Albeli_Alpine_w \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.albellus.txt \
--weir-fst-pop C.confusus.txt \
--fst-window-size 250000 \
--fst-window-step 50000
fi
if [ ! -e FST/P_Arctic.weir.fst ]; then
vcftools --vcf dirty.recode.vcf \
--out ./FST/P_Arctic \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop LAN_P.txt \
--weir-fst-pop SUO_P.txt
fi
if [ ! -e FST/P_Arctic_w.windowed.weir.fst ]; then
vcftools --vcf dirty.recode.vcf \
--out ./FST/P_Arctic_w \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop LAN_P.txt \
--weir-fst-pop SUO_P.txt \
--fst-window-size 250000 \
--fst-window-step 50000
fi
if [ ! -e FST/L_Arctic.weir.fst ]; then
vcftools --vcf dirty.recode.vcf \
--out ./FST/L_Arctic \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop LAN_L.txt \
--weir-fst-pop SUO_L.txt
fi
if [ ! -e FST/L_Arctic_w.windowed.weir.fst ]; then
vcftools --vcf dirty.recode.vcf \
--out ./FST/L_Arctic_w \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop LAN_L.txt \
--weir-fst-pop SUO_L.txt \
--fst-window-size 250000 \
--fst-window-step 50000
fi
library(ggplot2)
#setwd("~/cophyhopa/results/2022-12-23")
FELCHEN <- read.table("./FST/Felchen_Alpine.weir.fst", header = TRUE)
CHR <- as.character(unique(FELCHEN$CHROM))
ggplot(data = FELCHEN,
mapping = aes(x = CHROM, y = WEIR_AND_COCKERHAM_FST)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
for (i in 1:length(CHR)) {
print(ggplot(data = FELCHEN[which(FELCHEN$CHROM == CHR[i]), ],
mapping = aes(x = POS, y = WEIR_AND_COCKERHAM_FST)) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
ggtitle(CHR[i]))
}
library(ggplot2)
setwd("~/cophyhopa/results/2022-12-23")
BALCHEN <- read.table("./FST/Balchen_Alpine.weir.fst", header = TRUE)
CHR <- as.character(unique(BALCHEN$CHROM))
ggplot(data = BALCHEN,
mapping = aes(x = CHROM, y = WEIR_AND_COCKERHAM_FST)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
for (i in 1:length(CHR)) {
print(ggplot(data = BALCHEN[which(BALCHEN$CHROM == CHR[i]), ],
mapping = aes(x = POS, y = WEIR_AND_COCKERHAM_FST)) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
ggtitle(CHR[i]))
}
library(ggplot2)
setwd("~/cophyhopa/results/2022-12-23")
ALBELI <- read.table("./FST/Albeli_Alpine.weir.fst", header = TRUE)
CHR <- as.character(unique(ALBELI$CHROM))
ggplot(data = ALBELI,
mapping = aes(x = CHROM, y = WEIR_AND_COCKERHAM_FST)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
for (i in 1:length(CHR)) {
print(ggplot(data = ALBELI[which(ALBELI$CHROM == CHR[i]), ],
mapping = aes(x = POS, y = WEIR_AND_COCKERHAM_FST)) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
ggtitle(CHR[i]))
}
library(ggplot2)
setwd("~/cophyhopa/results/2022-12-23")
PROF <- read.table("./FST/P_Arctic.weir.fst", header = TRUE)
CHR <- as.character(unique(FELCHEN$CHROM))
ggplot(data = PROF,
mapping = aes(x = CHROM, y = WEIR_AND_COCKERHAM_FST)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
for (i in 1:length(CHR)) {
print(ggplot(data = PROF[which(PROF$CHROM == CHR[i]), ],
mapping = aes(x = POS, y = WEIR_AND_COCKERHAM_FST)) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
ggtitle(CHR[i]))
}
library(ggplot2)
setwd("~/cophyhopa/results/2022-12-23")
LITO <- read.table("./FST/L_Arctic.weir.fst", header = TRUE)
CHR <- as.character(unique(FELCHEN$CHROM))
ggplot(data = LITO,
mapping = aes(x = CHROM, y = WEIR_AND_COCKERHAM_FST)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
for (i in 1:length(CHR)) {
print(ggplot(data = LITO[which(LITO$CHROM == CHR[i]), ],
mapping = aes(x = POS, y = WEIR_AND_COCKERHAM_FST)) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
ggtitle(CHR[i]))
}